Celem analizy jest interpretacja danych finansowych ceclem znalezienia wzorców wpływających na ceny różnych aktyw oraz wykonanie modelu potrafiącego zwrócić korzystne informacje dotyczące ceny złota.
Wśród danych znalazły się:
W celu znalezienia możliwych korelacji w zbiorach dane zostały porównane ze sobą oraz sprawdzona została ich korelacja. W analizie zawarte zostały:
W sekcji poświęconej próbie predykcji korzystnych informacji dotyczących ceny złota stworzony został klasyfikator próbujący przewidzieć dobry moment na kupno i sprzedaż kruszcu.
Errata: Błędy z ggplotly:
Błędy z kable_styling:
library(dplyr)
library(plotly)
library(ggplot2)
library(readxl)
library(tidyr)
library(knitr)
library(kableExtra)
library(TTR)
library(data.table)
library(caret)
library(corrplot)
library(RColorBrewer)
library(randomForest)
set.seed(23)
Gold.prices <- read.csv("C:/Users/alili/Desktop/studia/9 semestr/ZED/projekt/Data pack/Gold prices.csv",
colClasses = c(rep("Date", 1),
rep("numeric", 2),
rep("NULL", 4)),
col.names = c('Date',
'Morning.Fix.USD',
'Afternoon.Fix.USD',
rep("NULL", 4)),
header = TRUE)
Gold.prices <- Gold.prices %>%
mutate(Morning.Fix.USD = coalesce(Morning.Fix.USD, Afternoon.Fix.USD),
Afternoon.Fix.USD = coalesce(Afternoon.Fix.USD, Morning.Fix.USD),
USD = (Morning.Fix.USD + Afternoon.Fix.USD) / 2)
Gold.prices.yearly <- Gold.prices %>%
mutate(Year = as.numeric(substr(Date, 1, 4))) %>%
group_by(Year) %>%
summarize( USD = (mean(Morning.Fix.USD) + mean(Afternoon.Fix.USD)) / 2)
Zbiór zawiera codzienne wyceny złota podczas sesji otwarcia i zamknięcia od dnia 1968-01-02 do 2021-09-29.
head(Gold.prices) %>%
kable() %>%
kable_styling("striped", full_width = F, position = 'left')
| Date | Morning.Fix.USD | Afternoon.Fix.USD | USD |
|---|---|---|---|
| 2021-09-29 | 1741.65 | 1737.15 | 1739.400 |
| 2021-09-28 | 1739.65 | 1733.75 | 1736.700 |
| 2021-09-27 | 1749.15 | 1755.30 | 1752.225 |
| 2021-09-24 | 1755.15 | 1746.80 | 1750.975 |
| 2021-09-23 | 1771.05 | 1750.00 | 1760.525 |
| 2021-09-22 | 1775.35 | 1773.40 | 1774.375 |
data.frame(nrow(Gold.prices)) %>%
rename("Liczba próbek" = 1) %>%
kable() %>%
kable_styling(full_width = FALSE, position = 'left')
| Liczba próbek |
|---|
| 13585 |
summary(Gold.prices) %>%
kable() %>%
kable_styling("striped", full_width = F, position = 'left')
| Date | Morning.Fix.USD | Afternoon.Fix.USD | USD | |
|---|---|---|---|---|
| Min. :1968-01-02 | Min. : 34.77 | Min. : 34.75 | Min. : 34.76 | |
| 1st Qu.:1981-06-10 | 1st Qu.: 280.50 | 1st Qu.: 280.30 | 1st Qu.: 280.27 | |
| Median :1994-11-14 | Median : 383.30 | Median : 383.45 | Median : 383.38 | |
| Mean :1994-11-16 | Mean : 575.17 | Mean : 574.98 | Mean : 575.07 | |
| 3rd Qu.:2008-04-23 | 3rd Qu.: 841.75 | 3rd Qu.: 838.25 | 3rd Qu.: 841.00 | |
| Max. :2021-09-29 | Max. :2061.50 | Max. :2067.15 | Max. :2058.15 |
Na interaktywnym wykresie widzimy zmianę cen otwarcia w czasie.
p <- ggplot(Gold.prices, aes(x = Date)) +
geom_line(aes(y = USD), color = "gold") +
theme_minimal()
ggplotly(p)
World_Development_Indicators <- read_excel("C:/Users/alili/Desktop/studia/9 semestr/ZED/projekt/Data pack/World_Development_Indicators.xlsx",
na = '..',
range = "A1:BC44305")
Country Name, pozostawienie statystyk dla całego swiata w zbiorze krajów,World_Development_Indicators <- World_Development_Indicators %>%
filter(!`Country Name` %in% c("Low & middle income","Low income","Lower middle income","Middle income","Upper middle income","High income"))
World_Development_Indicators.Series_Codes <- select(World_Development_Indicators, `Series Name`, `Series Code`)
World_Development_Indicators <- select(World_Development_Indicators, -`Series Code`)
World_Development_Indicators <- World_Development_Indicators %>%
pivot_longer(cols = `1970 [YR1970]`:`2020 [YR2020]`, names_to = "Year") %>%
group_by(`Series Name`) %>%
mutate(row = row_number()) %>%
tidyr::pivot_wider(names_from = `Series Name`, values_from = value) %>%
select(-row)
World_Development_Indicators <- World_Development_Indicators %>%
mutate(Year = as.numeric(substr(Year, 1, 4)))
country.count <- length(unique(World_Development_Indicators$`Country Name`)) - 1
indicators.count <- World_Development_Indicators %>%
select(-`Country Name`, -`Country Code`, -Year) %>%
ncol
WDI_summary <- data.frame(country.count, indicators.count) %>%
rename("Liczba krajów" = country.count,
"Liczba wskaźników" = indicators.count)
kable(WDI_summary) %>%
kable_styling(full_width = FALSE, position = 'left')
| Liczba krajów | Liczba wskaźników |
|---|---|
| 201 | 213 |
| Lista krajów dostępnych w zbiorze |
|---|
| Afghanistan |
| Albania |
| Algeria |
| American Samoa |
| Andorra |
| Angola |
| Antigua and Barbuda |
| Argentina |
| Armenia |
| Aruba |
| Australia |
| Austria |
| Azerbaijan |
| Bahamas, The |
| Bahrain |
| Bangladesh |
| Barbados |
| Belarus |
| Belgium |
| Belize |
| Benin |
| Bermuda |
| Bhutan |
| Bolivia |
| Brazil |
| British Virgin Islands |
| Bulgaria |
| Burundi |
| Cambodia |
| Cameroon |
| Canada |
| Cayman Islands |
| Central African Republic |
| Chad |
| Channel Islands |
| Chile |
| China |
| Colombia |
| Comoros |
| Congo, Dem. Rep. |
| Congo, Rep. |
| Costa Rica |
| Croatia |
| Cuba |
| Curacao |
| Cyprus |
| Czech Republic |
| Denmark |
| Djibouti |
| Dominica |
| Dominican Republic |
| Ecuador |
| Egypt, Arab Rep. |
| El Salvador |
| Equatorial Guinea |
| Eritrea |
| Estonia |
| Eswatini |
| Ethiopia |
| Faroe Islands |
| Fiji |
| Finland |
| France |
| French Polynesia |
| Gabon |
| Gambia, The |
| Georgia |
| Germany |
| Ghana |
| Gibraltar |
| Greece |
| Greenland |
| Grenada |
| Guam |
| Guatemala |
| Guinea |
| Guinea-Bissau |
| Guyana |
| Haiti |
| Honduras |
| Hong Kong SAR, China |
| Hungary |
| Iceland |
| India |
| Indonesia |
| Iran, Islamic Rep. |
| Iraq |
| Ireland |
| Isle of Man |
| Israel |
| Italy |
| Jamaica |
| Japan |
| Jordan |
| Kazakhstan |
| Kenya |
| Kiribati |
| Korea, Dem. People’s Rep. |
| Korea, Rep. |
| Kosovo |
| Kuwait |
| Kyrgyz Republic |
| Lao PDR |
| Latvia |
| Lebanon |
| Lesotho |
| Liberia |
| Libya |
| Liechtenstein |
| Lithuania |
| Luxembourg |
| Macao SAR, China |
| Madagascar |
| Malawi |
| Malaysia |
| Maldives |
| Mali |
| Malta |
| Marshall Islands |
| Mauritania |
| Mauritius |
| Mexico |
| Micronesia, Fed. Sts. |
| Moldova |
| Monaco |
| Mongolia |
| Montenegro |
| Morocco |
| Mozambique |
| Myanmar |
| Namibia |
| Nepal |
| Netherlands |
| New Caledonia |
| New Zealand |
| Nicaragua |
| Niger |
| Nigeria |
| North Macedonia |
| Norway |
| Oman |
| Pakistan |
| Panama |
| Papua New Guinea |
| Paraguay |
| Peru |
| Philippines |
| Poland |
| Portugal |
| Puerto Rico |
| Qatar |
| Romania |
| Russian Federation |
| Rwanda |
| San Marino |
| Sao Tome and Principe |
| Saudi Arabia |
| Senegal |
| Serbia |
| Seychelles |
| Sierra Leone |
| Singapore |
| Sint Maarten (Dutch part) |
| Slovak Republic |
| Slovenia |
| Solomon Islands |
| South Africa |
| South Sudan |
| Spain |
| St. Vincent and the Grenadines |
| Sudan |
| Suriname |
| Sweden |
| Switzerland |
| Syrian Arab Republic |
| Tajikistan |
| Tanzania |
| Thailand |
| Togo |
| Tonga |
| Trinidad and Tobago |
| Tunisia |
| Turkey |
| Turks and Caicos Islands |
| Tuvalu |
| Uganda |
| Ukraine |
| United Arab Emirates |
| United Kingdom |
| United States |
| Uruguay |
| Uzbekistan |
| Vanuatu |
| Venezuela, RB |
| Vietnam |
| Virgin Islands (U.S.) |
| West Bank and Gaza |
| Yemen, Rep. |
| Zambia |
| Zimbabwe |
| Bosnia and Herzegovina |
| World |
| Lista dostępnych wskaźników |
|---|
| Country Name |
| Country Code |
| Year |
| Urban population growth (annual %) |
| Urban population (% of total population) |
| Value lost due to electrical outages (% of sales for affected firms) |
| Urban population |
| Urban land area (sq. km) |
| Unemployment, total (% of total labor force) (national estimate) |
| Unemployment with advanced education (% of total labor force with advanced education) |
| Transport services (% of commercial service exports) |
| Transport services (% of commercial service imports) |
| Trained teachers in upper secondary education (% of total teachers) |
| Trained teachers in secondary education (% of total teachers) |
| Trained teachers in primary education (% of total teachers) |
| Trademark applications, direct nonresident |
| Trade in services (% of GDP) |
| Trade (% of GDP) |
| Trademark applications, direct resident |
| Trademark applications, total |
| Total natural resources rents (% of GDP) |
| Total greenhouse gas emissions (kt of CO2 equivalent) |
| Total greenhouse gas emissions (% change from 1990) |
| Total alcohol consumption per capita (liters of pure alcohol, projected estimates, 15+ years of age) |
| Time required to build a warehouse (days) |
| Time required to enforce a contract (days) |
| Time required to get electricity (days) |
| Taxes on goods and services (current LCU) |
| Taxes on income, profits and capital gains (% of revenue) |
| Taxes on income, profits and capital gains (% of total taxes) |
| Taxes on income, profits and capital gains (current LCU) |
| Taxes on international trade (% of revenue) |
| Taxes on international trade (current LCU) |
| Taxes on goods and services (% value added of industry and services) |
| Taxes on goods and services (% of revenue) |
| Taxes on exports (current LCU) |
| Taxes on exports (% of tax revenue) |
| Taxes less subsidies on products (current US\() </td> </tr> <tr> <td style="text-align:left;"> Taxes less subsidies on products (current LCU) </td> </tr> <tr> <td style="text-align:left;"> Taxes less subsidies on products (constant LCU) </td> </tr> <tr> <td style="text-align:left;"> Tax revenue (current LCU) </td> </tr> <tr> <td style="text-align:left;"> Tax revenue (% of GDP) </td> </tr> <tr> <td style="text-align:left;"> Tax payments (number) </td> </tr> <tr> <td style="text-align:left;"> Survival to age 65, female (% of cohort) </td> </tr> <tr> <td style="text-align:left;"> Survival to age 65, male (% of cohort) </td> </tr> <tr> <td style="text-align:left;"> Suicide mortality rate (per 100,000 population) </td> </tr> <tr> <td style="text-align:left;"> Suicide mortality rate, female (per 100,000 female population) </td> </tr> <tr> <td style="text-align:left;"> Suicide mortality rate, male (per 100,000 male population) </td> </tr> <tr> <td style="text-align:left;"> Stocks traded, turnover ratio of domestic shares (%) </td> </tr> <tr> <td style="text-align:left;"> Stocks traded, total value (current US\)) |
| Stocks traded, total value (% of GDP) |
| Strength of legal rights index (0=weak to 12=strong) |
| Short-term debt (% of total reserves) |
| Short-term debt (% of total external debt) |
| Short-term debt (% of exports of goods, services and primary income) |
| Share of youth not in education, employment or training, female (% of female youth population) |
| Share of youth not in education, employment or training, male (% of male youth population) |
| Share of youth not in education, employment or training, total (% of youth population) |
| Services, value added (% of GDP) |
| Service imports (BoP, current US\() </td> </tr> <tr> <td style="text-align:left;"> Service exports (BoP, current US\)) |
| Self-employed, male (% of male employment) (modeled ILO estimate) |
| Self-employed, total (% of total employment) (modeled ILO estimate) |
| Self-employed, female (% of female employment) (modeled ILO estimate) |
| Secure Internet servers |
| Secure Internet servers (per 1 million people) |
| Secondary education, teachers |
| Secondary education, pupils |
| Scientific and technical journal articles |
| School enrollment, tertiary (gross), gender parity index (GPI) |
| S&P Global Equity Indices (annual % change) |
| Rural population growth (annual %) |
| Rural population (% of total population) |
| Rural population |
| Researchers in R&D (per million people) |
| Research and development expenditure (% of GDP) |
| Renewable energy consumption (% of total final energy consumption) |
| Renewable internal freshwater resources per capita (cubic meters) |
| Renewable internal freshwater resources, total (billion cubic meters) |
| Renewable electricity output (% of total electricity output) |
| Real interest rate (%) |
| Pupil-teacher ratio, upper secondary |
| Pupil-teacher ratio, tertiary |
| Pupil-teacher ratio, secondary |
| Pupil-teacher ratio, primary |
| Pupil-teacher ratio, preprimary |
| Rail lines (total route-km) |
| Railways, goods transported (million ton-km) |
| Railways, passengers carried (million passenger-km) |
| Proportion of seats held by women in national parliaments (%) |
| Primary income payments (BoP, current US\() </td> </tr> <tr> <td style="text-align:left;"> Primary income receipts (BoP, current US\)) |
| Primary school starting age (years) |
| Prevalence of undernourishment (% of population) |
| Portfolio investment, net (BoP, current US\() </td> </tr> <tr> <td style="text-align:left;"> Portfolio investment, bonds (PPG + PNG) (NFL, current US\)) |
| Portfolio equity, net inflows (BoP, current US\() </td> </tr> <tr> <td style="text-align:left;"> Population, total </td> </tr> <tr> <td style="text-align:left;"> Population, male </td> </tr> <tr> <td style="text-align:left;"> Population, male (% of total population) </td> </tr> <tr> <td style="text-align:left;"> Population, female (% of total population) </td> </tr> <tr> <td style="text-align:left;"> Population, female </td> </tr> <tr> <td style="text-align:left;"> Population living in slums (% of urban population) </td> </tr> <tr> <td style="text-align:left;"> Population in urban agglomerations of more than 1 million </td> </tr> <tr> <td style="text-align:left;"> Population in the largest city (% of urban population) </td> </tr> <tr> <td style="text-align:left;"> Population in largest city </td> </tr> <tr> <td style="text-align:left;"> Population growth (annual %) </td> </tr> <tr> <td style="text-align:left;"> Population density (people per sq. km of land area) </td> </tr> <tr> <td style="text-align:left;"> Population ages 65 and above (% of total population) </td> </tr> <tr> <td style="text-align:left;"> Population ages 15-64 (% of total population) </td> </tr> <tr> <td style="text-align:left;"> Population ages 0-14 (% of total population) </td> </tr> <tr> <td style="text-align:left;"> PM2.5 air pollution, mean annual exposure (micrograms per cubic meter) </td> </tr> <tr> <td style="text-align:left;"> PM2.5 air pollution, population exposed to levels exceeding WHO guideline value (% of total) </td> </tr> <tr> <td style="text-align:left;"> PM2.5 pollution, population exposed to levels exceeding WHO Interim Target-1 value (% of total) </td> </tr> <tr> <td style="text-align:left;"> PM2.5 pollution, population exposed to levels exceeding WHO Interim Target-2 value (% of total) </td> </tr> <tr> <td style="text-align:left;"> PM2.5 pollution, population exposed to levels exceeding WHO Interim Target-3 value (% of total) </td> </tr> <tr> <td style="text-align:left;"> Part time employment, total (% of total employment) </td> </tr> <tr> <td style="text-align:left;"> Patent applications, nonresidents </td> </tr> <tr> <td style="text-align:left;"> Patent applications, residents </td> </tr> <tr> <td style="text-align:left;"> Number of under-five deaths </td> </tr> <tr> <td style="text-align:left;"> Nitrous oxide emissions (% change from 1990) </td> </tr> <tr> <td style="text-align:left;"> Nitrous oxide emissions (thousand metric tons of CO2 equivalent) </td> </tr> <tr> <td style="text-align:left;"> Nitrous oxide emissions in energy sector (% of total) </td> </tr> <tr> <td style="text-align:left;"> Net primary income (Net income from abroad) (current US\)) |
| Net primary income (Net income from abroad) (current LCU) |
| Net primary income (Net income from abroad) (constant LCU) |
| Net primary income (BoP, current US\() </td> </tr> <tr> <td style="text-align:left;"> Net official development assistance received (current US\)) |
| Net official aid received (current US\() </td> </tr> <tr> <td style="text-align:left;"> Net domestic credit (current LCU) </td> </tr> <tr> <td style="text-align:left;"> Net acquisition of financial assets (% of GDP) </td> </tr> <tr> <td style="text-align:left;"> Natural gas rents (% of GDP) </td> </tr> <tr> <td style="text-align:left;"> Mortality rate, infant (per 1,000 live births) </td> </tr> <tr> <td style="text-align:left;"> Mortality caused by road traffic injury (per 100,000 population) </td> </tr> <tr> <td style="text-align:left;"> Methane emissions (% change from 1990) </td> </tr> <tr> <td style="text-align:left;"> Methane emissions (kt of CO2 equivalent) </td> </tr> <tr> <td style="text-align:left;"> Methane emissions in energy sector (thousand metric tons of CO2 equivalent) </td> </tr> <tr> <td style="text-align:left;"> Merchandise exports to high-income economies (% of total merchandise exports) </td> </tr> <tr> <td style="text-align:left;"> Manufacturing, value added (% of GDP) </td> </tr> <tr> <td style="text-align:left;"> Literacy rate, adult total (% of people ages 15 and above) </td> </tr> <tr> <td style="text-align:left;"> Life expectancy at birth, total (years) </td> </tr> <tr> <td style="text-align:left;"> Lending interest rate (%) </td> </tr> <tr> <td style="text-align:left;"> Land area (sq. km) </td> </tr> <tr> <td style="text-align:left;"> Labor force, total </td> </tr> <tr> <td style="text-align:left;"> International tourism, expenditures (current US\)) |
| International migrant stock (% of population) |
| Interest payments (% of expense) |
| Inflation, consumer prices (annual %) |
| Individuals using the Internet (% of population) |
| Income share held by highest 10% |
| Imports of goods and services (current US\() </td> </tr> <tr> <td style="text-align:left;"> Imports of goods and services (% of GDP) </td> </tr> <tr> <td style="text-align:left;"> ICT goods exports (% of total goods exports) </td> </tr> <tr> <td style="text-align:left;"> Gross savings (% of GDP) </td> </tr> <tr> <td style="text-align:left;"> Gross national expenditure (% of GDP) </td> </tr> <tr> <td style="text-align:left;"> Gross national expenditure (current US\)) |
| Gross savings (current US\() </td> </tr> <tr> <td style="text-align:left;"> Gross domestic savings (% of GDP) </td> </tr> <tr> <td style="text-align:left;"> Gross domestic savings (current US\)) |
| Government expenditure on education, total (% of GDP) |
| Goods exports (BoP, current US\() </td> </tr> <tr> <td style="text-align:left;"> Goods imports (BoP, current US\)) |
| GNI growth (annual %) |
| GDP per capita (current US\() </td> </tr> <tr> <td style="text-align:left;"> GDP per capita growth (annual %) </td> </tr> <tr> <td style="text-align:left;"> GDP growth (annual %) </td> </tr> <tr> <td style="text-align:left;"> GDP (current US\)) |
| Fuel exports (% of merchandise exports) |
| Fuel imports (% of merchandise imports) |
| Food exports (% of merchandise exports) |
| Food imports (% of merchandise imports) |
| External debt stocks (% of GNI) |
| Exports of goods and services (current US\() </td> </tr> <tr> <td style="text-align:left;"> Exports of goods and services (annual % growth) </td> </tr> <tr> <td style="text-align:left;"> Expense (% of GDP) </td> </tr> <tr> <td style="text-align:left;"> Employment in industry (% of total employment) (modeled ILO estimate) </td> </tr> <tr> <td style="text-align:left;"> Employment in services (% of total employment) (modeled ILO estimate) </td> </tr> <tr> <td style="text-align:left;"> Employment in agriculture (% of total employment) (modeled ILO estimate) </td> </tr> <tr> <td style="text-align:left;"> Employers, total (% of total employment) (modeled ILO estimate) </td> </tr> <tr> <td style="text-align:left;"> Electricity production from renewable sources, excluding hydroelectric (kWh) </td> </tr> <tr> <td style="text-align:left;"> Electricity production from renewable sources, excluding hydroelectric (% of total) </td> </tr> <tr> <td style="text-align:left;"> Electricity production from oil, gas and coal sources (% of total) </td> </tr> <tr> <td style="text-align:left;"> Electricity production from coal sources (% of total) </td> </tr> <tr> <td style="text-align:left;"> Electricity production from hydroelectric sources (% of total) </td> </tr> <tr> <td style="text-align:left;"> Electricity production from natural gas sources (% of total) </td> </tr> <tr> <td style="text-align:left;"> Electricity production from nuclear sources (% of total) </td> </tr> <tr> <td style="text-align:left;"> Ease of doing business score (0 = lowest performance to 100 = best performance) </td> </tr> <tr> <td style="text-align:left;"> Diabetes prevalence (% of population ages 20 to 79) </td> </tr> <tr> <td style="text-align:left;"> Deposit interest rate (%) </td> </tr> <tr> <td style="text-align:left;"> Current health expenditure per capita (current US\)) |
| Current health expenditure (% of GDP) |
| Consumer price index (2010 = 100) |
| CO2 emissions from solid fuel consumption (% of total) |
| CO2 emissions from solid fuel consumption (kt) |
| CO2 emissions from transport (% of total fuel combustion) |
| CO2 intensity (kg per kg of oil equivalent energy use) |
| CO2 emissions from residential buildings and commercial and public services (% of total fuel combustion) |
| CO2 emissions from other sectors, excluding residential buildings and commercial and public services (% of total fuel combustion) |
| CO2 emissions from manufacturing industries and construction (% of total fuel combustion) |
| CO2 emissions from liquid fuel consumption (kt) |
| CO2 emissions from liquid fuel consumption (% of total) |
| CO2 emissions from gaseous fuel consumption (kt) |
| CO2 emissions from gaseous fuel consumption (% of total) |
| CO2 emissions from electricity and heat production, total (% of total fuel combustion) |
| CO2 emissions (metric tons per capita) |
| CO2 emissions (kt) |
| CO2 emissions (kg per PPP $ of GDP) |
| CO2 emissions (kg per 2017 PPP $ of GDP) |
| CO2 emissions (kg per 2010 US$ of GDP) |
| Birth rate, crude (per 1,000 people) |
| Bank capital to assets ratio (%) |
| Average number of visits or required meetings with tax officials (for affected firms) |
| Average precipitation in depth (mm per year) |
| Automated teller machines (ATMs) (per 100,000 adults) |
| Account ownership at a financial institution or with a mobile-money-service provider (% of population ages 15+) |
| Access to electricity (% of population) |
SP.Composite <- read.csv("C:/Users/alili/Desktop/studia/9 semestr/ZED/projekt/Data pack/S&P Composite.csv")
SP.Composite <- SP.Composite %>%
rename(Date = Year,
CAPE = Cyclically.Adjusted.PE.Ratio) %>%
mutate(Date = as.Date(Date))
plot.data_SP.Composite <- SP.Composite %>%
pivot_longer(2:10) %>%
filter(!is.na(value))
plot.data_SP.Composite %>%
ggplot(aes(x = Date, y = value)) +
geom_line() +
facet_wrap(name ~ ., scales="free", ncol = 3) +
theme_minimal()
head(SP.Composite) %>%
kable() %>%
kable_styling("striped", full_width = F, position = 'left') %>%
scroll_box( width = '100%')
| Date | S.P.Composite | Dividend | Earnings | CPI | Long.Interest.Rate | Real.Price | Real.Dividend | Real.Earnings | CAPE |
|---|---|---|---|---|---|---|---|---|---|
| 2021-10-31 | 3700.650 | NA | NA | 260.1098 | 0.93 | 3700.650 | NA | NA | 33.73946 |
| 2021-09-30 | 4493.280 | NA | NA | 273.9832 | 1.29 | 4477.204 | NA | NA | 38.34228 |
| 2021-08-31 | 4454.206 | NA | NA | 273.6565 | 1.28 | 4443.570 | NA | NA | 38.09043 |
| 2021-07-31 | 4363.713 | NA | NA | 273.0030 | 1.32 | 4363.713 | NA | NA | 37.44349 |
| 2021-06-30 | 4238.490 | 57.86504 | 158.74 | 271.6960 | 1.52 | 4258.879 | 58.14340 | 159.5036 | 36.69631 |
| 2021-05-31 | 4167.850 | 57.78782 | 148.56 | 269.1950 | 1.62 | 4226.807 | 58.60528 | 150.6615 | 36.55215 |
data.frame(nrow(Gold.prices)) %>%
rename("Liczba próbek" = 1) %>%
kable() %>%
kable_styling(full_width = FALSE, position = 'left')
| Liczba próbek |
|---|
| 13585 |
summary(SP.Composite) %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover"), position = 'left') %>%
scroll_box( width = '100%')
| Date | S.P.Composite | Dividend | Earnings | CPI | Long.Interest.Rate | Real.Price | Real.Dividend | Real.Earnings | CAPE | |
|---|---|---|---|---|---|---|---|---|---|---|
| Min. :1871-01-31 | Min. : 2.730 | Min. : 0.1800 | Min. : 0.1600 | Min. : 6.28 | Min. : 0.620 | Min. : 73.9 | Min. : 5.445 | Min. : 4.576 | Min. : 4.784 | |
| 1st Qu.:1908-10-07 | 1st Qu.: 7.902 | 1st Qu.: 0.4202 | 1st Qu.: 0.5608 | 1st Qu.: 10.20 | 1st Qu.: 3.171 | 1st Qu.: 186.6 | 1st Qu.: 9.417 | 1st Qu.: 14.063 | 1st Qu.:11.898 | |
| Median :1946-06-15 | Median : 17.370 | Median : 0.8717 | Median : 1.4625 | Median : 20.35 | Median : 3.815 | Median : 283.3 | Median :14.411 | Median : 23.524 | Median :16.381 | |
| Mean :1946-06-15 | Mean : 327.968 | Mean : 6.7321 | Mean : 15.3714 | Mean : 62.39 | Mean : 4.504 | Mean : 622.0 | Mean :17.498 | Mean : 34.907 | Mean :17.215 | |
| 3rd Qu.:1984-02-21 | 3rd Qu.: 164.400 | 3rd Qu.: 7.0525 | 3rd Qu.: 14.7258 | 3rd Qu.:102.28 | 3rd Qu.: 5.139 | 3rd Qu.: 707.0 | 3rd Qu.:22.301 | 3rd Qu.: 43.768 | 3rd Qu.:20.913 | |
| Max. :2021-10-31 | Max. :4493.280 | Max. :59.6800 | Max. :158.7400 | Max. :273.98 | Max. :15.320 | Max. :4477.2 | Max. :63.511 | Max. :159.504 | Max. :44.198 | |
| NA | NA | NA’s :4 | NA’s :4 | NA | NA | NA | NA’s :4 | NA’s :4 | NA’s :120 |
Currency.Exchange.Rates <- read.csv("C:/Users/alili/Desktop/studia/9 semestr/ZED/projekt/Data pack/CurrencyExchangeRates.csv")
Currency.Exchange.Rates <- Currency.Exchange.Rates %>%
mutate(Date = as.Date(Date))
Zbiór zawiera codzienny kurs wymiany walut od dnia 1995-01-02 do 2018-05-02.
Currency.Exchange.Rates %>%
names %>%
data.frame() %>%
rename('Dostępne waluty' = '.') %>%
kable %>%
kable_styling("striped", full_width = F, position = 'left') %>%
scroll_box(width = '400px', height = '400px')
| Dostępne waluty |
|---|
| Date |
| Algerian.Dinar |
| Australian.Dollar |
| Bahrain.Dinar |
| Bolivar.Fuerte |
| Botswana.Pula |
| Brazilian.Real |
| Brunei.Dollar |
| Canadian.Dollar |
| Chilean.Peso |
| Chinese.Yuan |
| Colombian.Peso |
| Czech.Koruna |
| Danish.Krone |
| Euro |
| Hungarian.Forint |
| Icelandic.Krona |
| Indian.Rupee |
| Indonesian.Rupiah |
| Iranian.Rial |
| Israeli.New.Sheqel |
| Japanese.Yen |
| Kazakhstani.Tenge |
| Korean.Won |
| Kuwaiti.Dinar |
| Libyan.Dinar |
| Malaysian.Ringgit |
| Mauritian.Rupee |
| Mexican.Peso |
| Nepalese.Rupee |
| New.Zealand.Dollar |
| Norwegian.Krone |
| Nuevo.Sol |
| Pakistani.Rupee |
| Peso.Uruguayo |
| Philippine.Peso |
| Polish.Zloty |
| Qatar.Riyal |
| Rial.Omani |
| Russian.Ruble |
| Saudi.Arabian.Riyal |
| Singapore.Dollar |
| South.African.Rand |
| Sri.Lanka.Rupee |
| Swedish.Krona |
| Swiss.Franc |
| Thai.Baht |
| Trinidad.And.Tobago.Dollar |
| Tunisian.Dinar |
| U.A.E..Dirham |
| U.K..Pound.Sterling |
| U.S..Dollar |
Bitcoin.prices <- read.csv("C:/Users/alili/Desktop/studia/9 semestr/ZED/projekt/Data pack/Bitcoin/BCHAIN-MKPRU.csv",
colClasses = c(rep("Date", 1),
rep("numeric", 1)),
col.names = c('Date',
'USD'))
Bitcoin.trade.volume <- read.csv("C:/Users/alili/Desktop/studia/9 semestr/ZED/projekt/Data pack/Bitcoin/BCHAIN-TRVOU.csv",
colClasses = c(rep("Date", 1),
rep("numeric", 1)),
col.names = c('Date',
'Trade Volume'))
Bitcoin.mine.difficulty <- read.csv("C:/Users/alili/Desktop/studia/9 semestr/ZED/projekt/Data pack/Bitcoin/BCHAIN-DIFF.csv",
colClasses = c(rep("Date", 1),
rep("numeric", 1)),
col.names = c('Date',
'Mine difficulty'))
Bitcoin.hash.rate <- read.csv("C:/Users/alili/Desktop/studia/9 semestr/ZED/projekt/Data pack/Bitcoin/BCHAIN-HRATE.csv",
colClasses = c(rep("Date", 1),
rep("numeric", 1)),
col.names = c('Date',
'Hash rate'))
Bitcoin <- Bitcoin.prices %>%
merge(Bitcoin.trade.volume, by = "Date") %>%
merge(Bitcoin.mine.difficulty, by = "Date") %>%
merge(Bitcoin.hash.rate, by = "Date")
Zestaw danych zawiera codzienne informacje od początku istnienia Bitcoina.
Znaczenie atrybutów: * USD - Bitcoin Market Price USD,Average USD market price across major bitcoin exchanges. * Trade volume - Bitcoin USD Exchange Trade Volume,The total USD value of trading volume on major bitcoin exchanges. * Mine difficulty - Bitcoin Difficulty,A relative measure of how difficult it is to find a new block. The difficulty is adjusted periodically as a function of how much hashing power has been deployed by the network of miners. * Hash rate - Bitcoin Hash Rate,The estimated number of tera hashes per second (trillions of hashes per second) the Bitcoin network is performing.
tail(Bitcoin) %>%
kable %>%
kable_styling("striped", full_width = F, position = 'left')
| Date | USD | Trade.Volume | Mine.difficulty | Hash.rate | |
|---|---|---|---|---|---|
| 4654 | 2021-09-30 | 41522.38 | 221224597 | 1.899764e+13 | 161488615 |
| 4655 | 2021-10-01 | 43757.81 | 360342502 | 1.899764e+13 | 132212901 |
| 4656 | 2021-10-02 | 48140.11 | 688291407 | 1.899764e+13 | 177543039 |
| 4657 | 2021-10-03 | 47727.10 | 184243788 | 1.899764e+13 | 141656680 |
| 4658 | 2021-10-04 | 48205.72 | 183312374 | 1.900912e+13 | 147411968 |
| 4659 | 2021-10-05 | 49143.95 | 370887916 | 1.989305e+13 | 162177736 |
summary(Bitcoin) %>%
kable %>%
kable_styling("striped", full_width = F, position = 'left')
| Date | USD | Trade.Volume | Mine.difficulty | Hash.rate | |
|---|---|---|---|---|---|
| Min. :2009-01-03 | Min. : 0.00 | Min. :0.000e+00 | Min. :0.000e+00 | Min. : 0 | |
| 1st Qu.:2012-03-12 | 1st Qu.: 7.21 | 1st Qu.:1.948e+05 | 1st Qu.:1.689e+06 | 1st Qu.: 12 | |
| Median :2015-05-21 | Median : 431.89 | Median :6.824e+06 | Median :4.881e+10 | Median : 356089 | |
| Mean :2015-05-21 | Mean : 5132.38 | Mean :1.467e+08 | Mean :3.665e+12 | Mean : 26458258 | |
| 3rd Qu.:2018-07-28 | 3rd Qu.: 6496.35 | 3rd Qu.:1.484e+08 | 3rd Qu.:5.364e+12 | 3rd Qu.: 38265984 | |
| Max. :2021-10-05 | Max. :63554.44 | Max. :5.352e+09 | Max. :2.505e+13 | Max. :198514006 |
Odpalić z puszczoną w tle piosenką “Another One Bites the Dust” zespołu Queen.
suicides <- World_Development_Indicators %>%
filter(`Country Name` == 'World') %>%
select(Year, `Suicide mortality rate, female (per 100,000 female population)`, `Suicide mortality rate, male (per 100,000 male population)`) %>%
filter(!is.na(`Suicide mortality rate, female (per 100,000 female population)`))
suicides.male <- suicides %>%
select(Year, `Suicide mortality rate, male (per 100,000 male population)`) %>%
rename(`Suicide mortality rate (per 100,000 of given gender population)` = `Suicide mortality rate, male (per 100,000 male population)`) %>%
mutate(gender = 'male')
suicides.female <- suicides %>%
select(Year, `Suicide mortality rate, female (per 100,000 female population)`) %>%
rename(`Suicide mortality rate (per 100,000 of given gender population)` = `Suicide mortality rate, female (per 100,000 female population)`) %>%
mutate(gender = 'female')
suicides.per.gender <- rbind.data.frame(suicides.male, suicides.female)
p <- suicides.per.gender %>%
ggplot(aes(x = Year, y = `Suicide mortality rate (per 100,000 of given gender population)`)) +
geom_line(aes(colour = gender), size = 1) +
geom_point(colour = 'royalblue', size = 2) +
expand_limits(y = 0) +
ggtitle('Współczynnik samobójstw (na 100,000 osób danej płci)') +
ylab('') +
theme_minimal()
ggplotly(p)
Wniosek: Liczba samobójstw na świecie spada.
gold <- Gold.prices %>%
select(Date, USD) %>%
arrange(desc(row_number())) %>%
filter( Date < '2021-09-29') %>%
filter( Date > '2016-01-01') %>%
rename( USD.gold = USD )
BTC_price <- Bitcoin.prices %>%
arrange(desc(row_number())) %>%
filter( Date < '2021-09-29') %>%
filter( Date > '2016-01-01') %>%
rename( USD.BTC = USD )
gold.btc <- gold %>%
inner_join(BTC_price, by = 'Date')
print(paste("Korelacja złota z ceną Bitcoina: ", round(cor(gold.btc$USD.gold, gold.btc$USD.BTC, use = "complete.obs"))))
## [1] "Korelacja złota z ceną Bitcoina: 1"
coeff.gold.btc <- max(gold.btc$USD.BTC)/max(gold.btc$USD.gold)
gold.btc %>%
ggplot( aes(x = Date) ) +
geom_line( aes(y = USD.gold), color = 'gold' ) +
geom_line( aes(y = USD.BTC/coeff.gold.btc), color = 'orange' ) +
scale_y_continuous(
name = "Cena złota",
sec.axis = sec_axis(~.*coeff.gold.btc, name="Cena BTC")
) +
ggtitle("Cena złota oraz BTC [USD]") +
theme_minimal() +
theme(
axis.title.y = element_text(color = 'gold', size=13),
axis.title.y.right = element_text(color = 'orange', size=13),
axis.text.y = element_text(color = 'gold', size=13),
axis.text.y.right = element_text(color = 'orange', size=13)
)
Wniosek: Pomimo korelacji na poziomie 0.68 wizualna inspekcja nie pokazuje jasnej zależności pomiędzy cenami porównywanych aktywów.
Gold.prices.monthly <- Gold.prices %>%
mutate(Date = substr(Date, 1, 7)) %>%
group_by(Date) %>%
summarize( USD = (mean(Morning.Fix.USD) + mean(Afternoon.Fix.USD)) / 2)
SP.df <- SP.Composite %>%
mutate(Date = substr(Date, 1, 7)) %>%
select(Date, S.P.Composite)
gold.SP <- Gold.prices.monthly %>%
inner_join(SP.df, by = 'Date') %>%
mutate(Date = as.Date(paste0(Date,'-01')))
print(paste("Korelacja złota z indeksem S&P Composite: ", round(cor(gold.SP$USD, gold.SP$S.P.Composite, use = "complete.obs"), 2)))
## [1] "Korelacja złota z indeksem S&P Composite: 0.82"
coeff.gold.SP <- max(gold.SP$S.P.Composite)/max(gold.SP$USD)
gold.SP %>%
ggplot( aes(x = Date) ) +
geom_line( aes(y = USD), color = 'gold' ) +
geom_line( aes(y = S.P.Composite/coeff.gold.SP), color = 'royalblue' ) +
scale_y_continuous(
name = "Cena złota",
sec.axis = sec_axis(~.*coeff.gold.SP, name="Cena S&P Composite")
) +
ggtitle("Cena złota oraz indeksu S&P Composite [USD]") +
theme_minimal() +
theme(
axis.title.y = element_text(color = 'gold', size=13),
axis.title.y.right = element_text(color = 'royalblue', size=13),
axis.text.y = element_text(color = 'gold', size=13),
axis.text.y.right = element_text(color = 'royalblue', size=13)
)
Wniosek: Pomimo korelacji na poziomie 0.82 wizualna inspekcja nie pokazuje jasnej zależności pomiędzy cenami porównywanych aktywów.
USA.WDI <- World_Development_Indicators %>%
filter(`Country Name` == 'United States') %>%
merge(Gold.prices.yearly, by = 'Year') %>%
select_if(~ !any(is.na(.)))
USA.WDI.to_cor <- USA.WDI %>%
select(-(1:3))
num_col=ncol(USA.WDI.to_cor[,-1])
out_indx <- which(upper.tri(diag(num_col)))
cor_cols <- USA.WDI.to_cor %>%
do(melt(cor(.[,-1], use="pairwise.complete.obs"), value.name="cor")[out_indx,])
cor_cols <- cor_cols %>%
filter(Var2 == 'USD') %>%
top_n(15) %>%
arrange(desc(cor)) %>%
rename("top correlations" = cor)
cor_cols %>%
kable %>%
kable_styling("striped", full_width = F, position = 'left')%>%
scroll_box(width = '100%')
| Var1 | Var2 | top correlations |
|---|---|---|
| Net primary income (BoP, current US\() </td> <td style="text-align:left;"> USD </td> <td style="text-align:right;"> 0.9375048 </td> </tr> <tr> <td style="text-align:left;"> Service exports (BoP, current US\)) | USD | 0.9017878 |
| Goods exports (BoP, current US\() </td> <td style="text-align:left;"> USD </td> <td style="text-align:right;"> 0.8963826 </td> </tr> <tr> <td style="text-align:left;"> Net domestic credit (current LCU) </td> <td style="text-align:left;"> USD </td> <td style="text-align:right;"> 0.8947741 </td> </tr> <tr> <td style="text-align:left;"> Primary income receipts (BoP, current US\)) | USD | 0.8925628 |
| Service imports (BoP, current US\() </td> <td style="text-align:left;"> USD </td> <td style="text-align:right;"> 0.8868864 </td> </tr> <tr> <td style="text-align:left;"> Goods imports (BoP, current US\)) | USD | 0.8723380 |
| GDP (current US\() </td> <td style="text-align:left;"> USD </td> <td style="text-align:right;"> 0.8627224 </td> </tr> <tr> <td style="text-align:left;"> Primary income payments (BoP, current US\)) | USD | 0.8473236 |
| Population ages 65 and above (% of total population) | USD | 0.8421186 |
| GDP per capita (current US$) | USD | 0.8418428 |
| Trade in services (% of GDP) | USD | 0.8413228 |
| Population in urban agglomerations of more than 1 million | USD | 0.8346021 |
| Urban population | USD | 0.8250818 |
| Population, male | USD | 0.8234517 |
cor_cols %>% ggplot(aes(x = reorder(Var1, `top correlations`),
y = `top correlations`,
label = sprintf("%0.2f", round(`top correlations`, digits = 2)))) +
geom_bar(position=position_dodge(), stat="identity", colour="darkgrey", fill = 'lightgrey', width = 0.5) +
geom_text(size = 3, hjust = 1.2) +
theme_minimal() +
coord_flip() +
theme(axis.title = element_blank()) +
ggtitle("Top 15 korelacji złota z wskaźnikami WDI USA ")
Wnioski:
Wskaźniki, które mają najwyższą korelację z ceną złota wskazują raczej na ogólny wzrost godspodarczy.
coeff.income_gold.price <- max(USA.WDI$`Net primary income (BoP, current US$)`)/max(USA.WDI$USD)
USA.WDI %>%
ggplot( aes(x = Year) ) +
geom_line( aes(y = USD), color = 'gold' ) +
geom_line( aes(y = `Net primary income (BoP, current US$)`/coeff.income_gold.price), color = 'royalblue' ) +
scale_y_continuous(
name = "Cena złota",
sec.axis = sec_axis(~.*coeff.gold.SP, name="Net primary income")
) +
ggtitle("Cena złota w porównaniu ze wskaźnikiem przychodu netto w USA") +
theme_minimal() +
theme(
axis.title.y = element_text(color = 'gold', size=13),
axis.title.y.right = element_text(color = 'royalblue', size=13),
axis.text.y = element_text(color = 'gold', size=13),
axis.text.y.right = element_text(color = 'royalblue', size=13)
)
coeff.service_export.price <- max(USA.WDI$`Service exports (BoP, current US$)`)/max(USA.WDI$USD)
USA.WDI %>%
ggplot( aes(x = Year) ) +
geom_line( aes(y = USD), color = 'gold' ) +
geom_line( aes(y = `Service exports (BoP, current US$)` / coeff.service_export.price), color = 'royalblue' ) +
scale_y_continuous(
name = "Cena złota",
sec.axis = sec_axis(~.*coeff.gold.SP, name="Service exports")
) +
ggtitle("Cena złota w porównaniu ze wskaźnikiem eksportu usług w USA") +
theme_minimal() +
theme(
axis.title.y = element_text(color = 'gold', size=13),
axis.title.y.right = element_text(color = 'royalblue', size=13),
axis.text.y = element_text(color = 'gold', size=13),
axis.text.y.right = element_text(color = 'royalblue', size=13)
)
Wnioski:
Wskaźniki WDI są podawane rok rocznie. Uważamy, że próba przewidywania ceny złota w ujęciu rocznym ze względu na zbyt małą ilość danych nie będzie wartościowa. Zamiast prób tworzenia regresora przewidującego cenę złota spróbujemy przewidzieć dobry moment na kupno i sprzedaż kruszcu opierając się o wskaźniki analizy technicznej.
Zamiast przewidywać cenę złota stworzymy model próbujący znaleźć we wskaźnikach analizy technicznej dobre momenty do kupna lub sprzedaży złota.
Próba oparta bedzie o strategię średnich kroczących (ang. moving averages - MA). Metoda ta polega na obliczeniu średniej ceny złota z okresu czasu o długości N wstecz.
Poza prostymi średnimi kroczącymi (ang. simple moving averages - SMA) wykorzystana jest również wykładnicza średnia krocząca (ang. exponential moving average - EMA). EMA różni się od SMA tym, że podczas obliczania średniej na wartości nakładane są wykładnicze wagi, które maleją wraz z odległością próbki.
Średnie kroczące pozwalają na wygładzenie szumu z szeregów cenowych uwydatniając w ten sposób trendy. Na podstawie wielu średnich kroczących można spróbować określić dobry moment kupna lub sprzedaży patrząc na ich miejsca przecinania.
Pierwszym krokiem jest ręczne zaznaczenie okresów w których warto było poszerzać lub zawężać ekspozycję swojego portfela inwestycyjnego na złoto.
Gold.prices <- read_excel("C:/Users/alili/Desktop/studia/9 semestr/ZED/projekt/Data pack/Gold_prices_with_marked_exposition.xlsx")
positive.exposition <- Gold.prices$exposition
positive.exposition[Gold.prices$exposition == -1] <- 0
negative.exposition <- Gold.prices$exposition
negative.exposition[Gold.prices$exposition == 1] <- 0
negative.exposition[Gold.prices$exposition == -1] <- 1
Gold.prices <- Gold.prices %>%
mutate('positive.exposition' = positive.exposition,
'negative.exposition' = negative.exposition,)
positive.exposition.count <- data.frame(positive.exposition) %>%
filter(positive.exposition > 0) %>%
count() %>%
rename("positive exposition" = n)
negative.exposition.count <- data.frame(negative.exposition) %>%
filter(negative.exposition > 0) %>%
count() %>%
rename("negative exposition" = n)
data.frame(nrow(Gold.prices) , positive.exposition.count, negative.exposition.count) %>%
rename("liczba próbek" = 1,
"positive exposition" = 2,
"negative exposition" = 3) %>%
kable %>%
kable_styling("striped", full_width = F, position = 'left')
| liczba próbek | positive exposition | negative exposition |
|---|---|---|
| 13585 | 2686 | 1458 |
Liczba miejsc do kupna jest znacznie wieksza niż miejsc do sprzedaży. Jest to spowodowane tym, że wzrost wartości złota jest powolny i jest więcej okazji do kupna, a spadki szybkie, napędzane emocjami inwestorów.
Poza tym widać, że zbiór jest niezbalansowany.
Gold.prices.with.MA <- Gold.prices %>%
arrange(desc(row_number())) %>%
mutate(MA3 = SMA(USD, 3),
MA7 = SMA(USD, 7),
EMA13 = EMA(USD, 13),
MA19 = SMA(USD, 19),
MA50 = SMA(USD, 50),
MA200 = SMA(USD, 200),
MA1095 = SMA(USD, 1095)
) %>%
filter_at(vars(-Date), all_vars(!is.na(.)))
head(Gold.prices.with.MA) %>%
kable() %>%
kable_styling("striped", full_width = F, position = 'left') %>%
scroll_box(width = '100%')
| Date | Morning.Fix.USD | Afternoon.Fix.USD | USD | exposition | positive.exposition | negative.exposition | MA3 | MA7 | EMA13 | MA19 | MA50 | MA200 | MA1095 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1972-05-05 | 50.52 | 51.05 | 50.785 | 1 | 1 | 0 | 50.51167 | 50.18786 | 49.91289 | 49.58105 | 48.8150 | 44.99840 | 39.88142 |
| 1972-05-08 | 51.60 | 52.08 | 51.840 | 1 | 1 | 0 | 51.00000 | 50.53429 | 50.18819 | 49.75763 | 48.8763 | 45.04960 | 39.89664 |
| 1972-05-09 | 54.00 | 53.70 | 53.850 | 1 | 1 | 0 | 52.15833 | 51.14857 | 50.71131 | 50.02000 | 48.9825 | 45.10915 | 39.91371 |
| 1972-05-10 | 53.30 | 53.10 | 53.200 | 1 | 1 | 0 | 52.96333 | 51.56643 | 51.06684 | 50.23132 | 49.0771 | 45.16480 | 39.93020 |
| 1972-05-11 | 53.00 | 53.00 | 53.000 | 1 | 1 | 0 | 53.35000 | 51.91786 | 51.34300 | 50.42342 | 49.1711 | 45.21860 | 39.94651 |
| 1972-05-12 | 52.80 | 52.90 | 52.850 | 1 | 1 | 0 | 53.01667 | 52.27143 | 51.55829 | 50.60579 | 49.2699 | 45.27065 | 39.96268 |
Mmmm, jaki piękny zbiór na wykonanie modelu… już nie mogę się doczekać :3
Na wykresie zauważyć można punkty przecięcia średnich kroczących, które sugerować mogą podjęcie operacji na rynku.
Zbiór jest dzielony wedle chronologii próbek, nie w sposób losowy, aby zbiór testowy nie był podobny do zbioru treningowego.
training_set_percentage <- 80
model.df <- Gold.prices.with.MA %>%
select(-(Date:USD), -positive.exposition, -negative.exposition) %>%
mutate(exposition = as.factor(exposition))
training <- model.df[1:round(nrow(model.df)*training_set_percentage/100),]
testing <- model.df[-(1:round(nrow(model.df)*training_set_percentage/100)),]
stopifnot(nrow(testing) + nrow(training) == nrow(model.df))
set.seed(23)
seeds <- vector(mode = "list", length = 26)
for(i in 1:25) seeds[[i]] <- sample.int(n=1000, 3)
seeds[[26]] <- sample.int(n=1000, 1)
ctrl <- trainControl(seeds = seeds)
fit <- train(exposition ~ .,
data = training,
method = "rf",
trControl = ctrl,
ntree = 20)
rfClasses <- predict(fit, newdata = testing)
confusionMatrix(data = rfClasses, testing$exposition)
## Confusion Matrix and Statistics
##
## Reference
## Prediction -1 0 1
## -1 53 36 0
## 0 425 1396 520
## 1 0 0 68
##
## Overall Statistics
##
## Accuracy : 0.6073
## 95% CI : (0.5878, 0.6265)
## No Information Rate : 0.5733
## P-Value [Acc > NIR] : 0.0003016
##
## Kappa : 0.1264
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: -1 Class: 0 Class: 1
## Sensitivity 0.11088 0.9749 0.11565
## Specificity 0.98218 0.1135 1.00000
## Pos Pred Value 0.59551 0.5963 1.00000
## Neg Pred Value 0.82358 0.7707 0.78601
## Prevalence 0.19135 0.5733 0.23539
## Detection Rate 0.02122 0.5588 0.02722
## Detection Prevalence 0.03563 0.9371 0.02722
## Balanced Accuracy 0.54653 0.5442 0.55782
Wyniki modelu są obiecujące, choć początkowo na to nie wskazują.
Wynik modelu najlepiej będzie ocenić wizualnie wyświetlając miejsca, które model wskazał jako dobre do kupna lub sprzedaży.
indicators.importance <- data.frame(importance(fit$finalModel)) %>%
mutate(names = rownames(.))
indicators.importance %>%
ggplot(aes(x = reorder(names, -MeanDecreaseGini),
y = MeanDecreaseGini)) +
geom_col() +
labs(x = "Atrybuty", y = "Ważność atrybutu") +
ggtitle("Ważności atrybutów") +
theme_minimal()
Model największą wagę przywiązuje do średnich kroczących z dłuższego okresu. Średnie liczone na dłuższym okresie bardziej wygładzają wykres i pokazują ogólny trend zachodzący w kursie. Przecięcia średnich krótszego okresu ze średnimi dłuższego okredu definitywnie pokazują zmianę trendu. Poleganie tylko na średnich długiego okresu nie jest dobre, ponieważ wraz z zwiększaniem okresu liczenia średniej wzrasta opóźnienie ich reakcji w stosunku do zmiany kursu.
Wnioski:
Model jest bardzo zachowawczy w swoich decyzjach i niepewnie decyduje się na inwestycje (pewnie z uwagi na duże przeuczenie - zauważone na sam koniec), jednak gdy już to zrobi robi to przeważnie trafnie. Model wykazuje również tendencję do akumulacji aktywa i niechętnie je odsprzedaje.
Metoda średnich kroczących nie jest skutecznym narzędziem podczas konsolidacji rynku. Zauważyć można, że model w momencie konsolidacji występującej w latach (około 2014 - 2019) nie popełnia wielu błędów i raczej ją przeczekał dokupując na dołkach. Sugeruje to potencjalnie wyższą skuteczność niż tradycyjna metoda.
Utrudnieniem dla klasyfikatora jest fakt, że w ostatnich latach nastąpił znacznie większy ruch kursu złota w porównaniu do lat na których klasyfikator był uczony.
Z eksperymentu widać, że zaproponowana metoda uczenia modelu znajdowania wzorców we wskaźnikach analizy technicznej zapowiada się obiecująco i w dalszych pracach można skupić się na dostrojeniu parametrów modelu oraz poszerzyć wachlarz dostępnych wskaźników. Zastanowić się również można nad dokładniejszym oznaczeniu miejsc kupna i sprzedaży.
Na tej wizualizacji widać przeuczenie modelu, które z uwagi na brak czasu nie zostało poprawione.